# Formatting data for Andreu's analyses
# Want total number of tweets and retweets per day for an account
# Regardless of the topic



library(tidyverse)


# read in the data
master <- read_csv("results-data/tweets_with_topic_probs.csv", 
                   col_types = cols(.default = col_character(),
                                    num_followers = col_integer(),
                                    initial_retweets = col_integer(),
                                    initial_favorites = col_integer(),
                                    delayed_retweets = col_integer(),
                                    delayed_favorites = col_double(),
                                    images_list = col_character(),
                                    video_id = col_character(),
                                    name = col_character(),
                                    tweet_text = col_character(),
                                    # is_valid = col_character(),
                                    gun_control = col_integer()
                   ))

# Daily summary
daily <- master %>% 
  group_by(name, tweet_dayfloor) %>% 
  summarize(total_daily_tweets = n(),
            total_daily_initial_rts = sum(initial_retweets),
            avg_daily_followers = mean(num_followers))

# write it out
write_csv(daily, "results-data/daily_org_tweet_summary.csv")

